In [1]:
import mxnet as mx
import numpy as np
from mxnet import gluon
from tqdm import tqdm
In [2]:
data_ctx = mx.cpu()
model_ctx = mx.cpu()
In [3]:
batch_size = 64
num_inputs = 784
num_outputs = 10
num_examples = 60000
In [4]:
def transform(data, label):
return data.astype(np.float32) / 255, label.astype(np.float32)
In [5]:
train_data = gluon.data.DataLoader(dataset=gluon.data.vision.MNIST(train=True, transform=transform),
batch_size=batch_size,
shuffle=True)
test_data = gluon.data.DataLoader(dataset=gluon.data.vision.MNIST(train=False, transform=transform),
batch_size=batch_size,
shuffle=False)
In [6]:
class MLP(gluon.Block):
def __init__(self, **kwargs):
super(MLP, self).__init__(**kwargs)
with self.name_scope():
self.dense0 = gluon.nn.Dense(64)
self.dense1 = gluon.nn.Dense(64)
self.dense2 = gluon.nn.Dense(10)
def forward(self, x):
x = mx.nd.relu(self.dense0(x))
x = mx.nd.relu(self.dense1(x))
x = self.dense2(x)
return x
In [7]:
net = MLP()
net.collect_params().initialize(mx.init.Normal(sigma=.01),
ctx=model_ctx)
In [8]:
data = mx.nd.ones(shape=[1, 784])
In [9]:
class MLP(gluon.Block):
def __init__(self, **kwargs):
super(MLP, self).__init__(**kwargs)
with self.name_scope():
self.dense0 = gluon.nn.Dense(units=64, activation="relu")
self.dense1 = gluon.nn.Dense(units=64, activation="relu")
self.dense2 = gluon.nn.Dense(units=10)
def forward(self, x):
x = self.dense0(x)
print("-" * 70)
print("Hidden Representation 1: %s" % x)
x = self.dense1(x)
print("-" * 70)
print("Hidden Representation 2: %s" % x)
x = self.dense2(x)
print("-" * 70)
print("Network output: %s" % x)
print("-" * 70)
return x
net = MLP()
net.collect_params().initialize(mx.init.Normal(sigma=.01), ctx=model_ctx)
net(data.as_in_context(model_ctx))
Out[9]:
In [10]:
num_hidden = 64
In [11]:
# Defining a sequential model
net = gluon.nn.Sequential()
with net.name_scope():
net.add(gluon.nn.Dense(units=num_hidden,
activation="relu"))
net.add(gluon.nn.Dense(units=num_hidden,
activation="relu"))
net.add(gluon.nn.Dense(units=num_outputs))
In [12]:
# Parameter initialization
net.collect_params().initialize(mx.init.Normal(sigma=.1),
ctx=model_ctx)
In [13]:
# Softmax cross-entropy
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
In [14]:
# Optimizer
trainer = gluon.Trainer(params=net.collect_params(),
optimizer='sgd',
optimizer_params={'learning_rate': 0.01})
In [15]:
def evaluate_accuracy(data_iterator, net):
acc = mx.metric.Accuracy()
for i, (data, label) in enumerate(data_iterator):
data = data.as_in_context(model_ctx).reshape((-1, 784))
label = label.as_in_context(model_ctx)
output = net(data)
predictions = mx.nd.argmax(data=output,
axis=1)
# Updating accuracy metric
acc.update(preds=predictions,
labels=label)
return acc.get()[1]
In [16]:
epochs = 10
smoothing_constant = .01
In [17]:
for e in tqdm(range(epochs)):
cumulative_loss = 0
for i, (data, label) in enumerate(train_data):
data = data.as_in_context(model_ctx).reshape((-1, 784))
label = label.as_in_context(model_ctx)
with mx.autograd.record():
output = net(data)
loss = softmax_cross_entropy(output, label)
loss.backward()
trainer.step(data.shape[0])
cumulative_loss += mx.nd.sum(loss).asscalar()
test_accuracy = evaluate_accuracy(test_data, net)
train_accuracy = evaluate_accuracy(train_data, net)
print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
(e, cumulative_loss/num_examples, train_accuracy, test_accuracy))
In [18]:
train_accuracy
Out[18]:
In [19]:
test_accuracy
Out[19]: